import scrapy
import re
from pathlib import Path

regexp4filename_no_dash_dot = re.compile(r'[^a-zA-Z0-9_,;]')

xset = set()

class QuotesSpider(scrapy.Spider):
    name = "sdau_links"
    start_urls = [
        "https://www.sdau.edu.cn/7/list.htm",
    ]

    def parse(self, response):
        # for quote in response.css("div.quote"):
        #     yield {
        #         "author": quote.xpath("span/small/text()").get(),
        #         "text": quote.css("span.text::text").get(),
        #     }
        #
        # next_page = response.css('li.next a::attr("href")').get()
        # if next_page is not None:
        #     yield response.follow(next_page, self.parse)
        url = response.request.url
        if url in xset:
            return
        xset.add(url)

        xname = regexp4filename_no_dash_dot.sub('_', url)
        filename = f"{xname}.html"
        Path(filename).write_bytes(response.body)

        # for xlink in response.css('a.col_item_link::attr("href")'):
        for xlink in response.css('a::attr("href")'):
            yield response.follow(xlink, self.parse)
